#------------------------------------------------------------------------------
# use combined individual level data to generate
# Table S5 : demographic comparisons
#==============================================================================

load_library = c('bit64','data.table','fst','future.apply','stringr','logger','vroom','rio','stringr')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))

bucket = '/N/project/iuni_doctorshopping/'

ind_combined = read_fst(file.path(bucket,'projects','covid_opioid','data','processed_data',
	'weekly_ses_individuals.fst'), as.data.table=TRUE)

# ___ Table S5 
#object.size(sum_ind_combined) %>% format('Gb')
ind_combined[, p.insurance_EPO := ifelse(PRODUCT == 'EPO', 1, 0)]
ind_combined[, p.insurance_HMO := ifelse(PRODUCT == 'HMO', 1, 0)]
ind_combined[, p.insurance_IND := ifelse(PRODUCT == 'IND', 1, 0)]
ind_combined[, p.insurance_OTH := ifelse(PRODUCT == 'OTH', 1, 0)]
ind_combined[, p.insurance_POS := ifelse(PRODUCT == 'POS', 1, 0)]
ind_combined[, p.insurance_PPO := ifelse(PRODUCT == 'PPO', 1, 0)]
ind_combined[, p.medicare_priv := ifelse(medicare == 'private', 1, 0)]
ind_combined[, p.medicare_dual := ifelse(medicare == 'dual', 1, 0)]
ind_combined[, p.medicare_lis  :=ifelse(medicare == 'lis', 1, 0)]
ind_combined[, p.medicare_oth  :=ifelse(medicare == 'other', 1, 0)]
	
# sum
demo_ind_combined = ind_combined[!is.na(period), lapply(.SD, sum, na.rm=TRUE), 
	.SDcols = c('n_week',
			'p.insurance_EPO','p.insurance_HMO','p.insurance_IND','p.insurance_OTH','p.insurance_POS','p.insurance_PPO',
			'p.medicare_priv','p.medicare_dual','p.medicare_lis','p.medicare_oth',
			'targetpain','backpain','neckpain','limbpain',
			'opioids','therapy','sum_opioid_days','sum_opioid_mme'), 
	by=c('PATID','female','YRDOB', 'year')]

#demo_ind_combined[, .N, by='PATID'][,table(N)]
#9255387 / (9255387 + 16469602)

demo_ind_combined = merge(demo_ind_combined, ses_data, by='PATID', all.x=TRUE)

demo_ind_combined[YRDOB > 0, age := as.integer(year - YRDOB + 1)]
demo_ind_combined[age >= 92, age := 90]
demo_ind_combined[is.na(race), race := 'unknown']

tab_demo_all = demo_ind_combined[, .(
	n.enrolled 		= .N,
	n.weekly_visit  = sum(n_week, na.rm=TRUE), 
	n.age 			= mean(age, na.rm=TRUE),
	n.female 		= sum(female, na.rm=TRUE),

	n.race_white	= sum(race == 'White', na.rm=TRUE),
	n.race_black	= sum(race == 'Black', na.rm=TRUE),
	n.race_hispanic	= sum(race == 'Hispanic', na.rm=TRUE),
	n.race_asian	= sum(race == 'Asian', na.rm=TRUE),
	n.race_unk		= sum(race == 'unknown', na.rm=TRUE),

	p.age 			= sd(age, na.rm=TRUE),
	p.female 		= mean(female, na.rm=TRUE),
	p.race_white	= mean(race == 'White', na.rm=TRUE),
	p.race_black	= mean(race == 'Black', na.rm=TRUE),
	p.race_hispanic	= mean(race == 'Hispanic', na.rm=TRUE),
	p.race_asian	= mean(race == 'Asian', na.rm=TRUE),
	p.race_unk		= mean(race == 'unknown', na.rm=TRUE),

	p.insurance_EPO = mean(p.insurance_EPO > 0, na.rm=TRUE),
	p.insurance_HMO = mean(p.insurance_HMO > 0, na.rm=TRUE),
	p.insurance_IND = mean(p.insurance_IND > 0, na.rm=TRUE),
	p.insurance_OTH = mean(p.insurance_OTH > 0, na.rm=TRUE),
	p.insurance_POS = mean(p.insurance_POS > 0, na.rm=TRUE),
	p.insurance_PPO = mean(p.insurance_PPO > 0, na.rm=TRUE),
	p.medicare_priv = mean(p.medicare_priv > 0, na.rm=TRUE),
	p.medicare_dual = mean(p.medicare_dual > 0, na.rm=TRUE),
	p.medicare_lis  = mean(p.medicare_lis > 0, na.rm=TRUE),
	p.medicare_oth  = mean(p.medicare_oth > 0, na.rm=TRUE),
	
	p.pain_all  	= mean(targetpain > 0, na.rm=TRUE),
	p.pain_back 	= mean(backpain > 0, na.rm=TRUE),
	p.pain_neck 	= mean(neckpain > 0, na.rm=TRUE),
	p.pain_limb 	= mean(limbpain > 0, na.rm=TRUE),

	n.insurance_EPO = sum(p.insurance_EPO > 0, na.rm=TRUE),
	n.insurance_HMO = sum(p.insurance_HMO > 0, na.rm=TRUE),
	n.insurance_IND = sum(p.insurance_IND > 0, na.rm=TRUE),
	n.insurance_OTH = sum(p.insurance_OTH > 0, na.rm=TRUE),
	n.insurance_POS = sum(p.insurance_POS > 0, na.rm=TRUE),
	n.insurance_PPO = sum(p.insurance_PPO > 0, na.rm=TRUE),
	n.medicare_priv = sum(p.medicare_priv > 0, na.rm=TRUE),
	n.medicare_dual = sum(p.medicare_dual > 0, na.rm=TRUE),
	n.medicare_lis  = sum(p.medicare_lis > 0, na.rm=TRUE),
	n.medicare_oth  = sum(p.medicare_oth > 0, na.rm=TRUE),

	n.pain_all  	= sum(targetpain > 0, na.rm=TRUE),
	n.pain_back 	= sum(backpain > 0, na.rm=TRUE),
	n.pain_neck 	= sum(neckpain > 0, na.rm=TRUE),
	n.pain_limb 	= sum(limbpain > 0, na.rm=TRUE)
), by=c('year')]

#object.size(sum_ind_combined) %>% format('Gb')
demo_ind_combined = ind_combined[!is.na(period) & targetpain > 0, lapply(.SD, sum, na.rm=TRUE), 
	.SDcols = c('n_week',
			'p.insurance_EPO','p.insurance_HMO','p.insurance_IND','p.insurance_OTH','p.insurance_POS','p.insurance_PPO',
			'p.medicare_priv','p.medicare_dual','p.medicare_lis','p.medicare_oth',
			'targetpain','backpain','neckpain','limbpain',
			'opioids','therapy','sum_opioid_days','sum_opioid_mme'), 
	by=c('PATID','female','YRDOB', 'year')]

#demo_ind_combined[, .N, by='PATID'][,table(N)]
#9255387 / (9255387 + 16469602)
demo_ind_combined = merge(demo_ind_combined, ses_data, by='PATID', all.x=TRUE)

demo_ind_combined[YRDOB > 0, age := as.integer(year - YRDOB + 1)]
demo_ind_combined[age >= 92, age := 90]
demo_ind_combined[is.na(race), race := 'unknown']

tab_demo_pain = demo_ind_combined[, .(
	n.enrolled 		= .N,
	n.weekly_visit  = sum(n_week, na.rm=TRUE), 

	n.age 			= mean(age, na.rm=TRUE),
	n.female 		= sum(female, na.rm=TRUE),
	n.race_white	= sum(race == 'White', na.rm=TRUE),
	n.race_black	= sum(race == 'Black', na.rm=TRUE),
	n.race_hispanic	= sum(race == 'Hispanic', na.rm=TRUE),
	n.race_asian	= sum(race == 'Asian', na.rm=TRUE),
	n.race_unk		= sum(race == 'unknown', na.rm=TRUE),

	p.age 			= sd(age, na.rm=TRUE),
	p.female 		= mean(female, na.rm=TRUE),
	p.race_white	= mean(race == 'White', na.rm=TRUE),
	p.race_black	= mean(race == 'Black', na.rm=TRUE),
	p.race_hispanic	= mean(race == 'Hispanic', na.rm=TRUE),
	p.race_asian	= mean(race == 'Asian', na.rm=TRUE),
	p.race_unk		= mean(race == 'unknown', na.rm=TRUE),

	p.insurance_EPO = mean(p.insurance_EPO > 0, na.rm=TRUE),
	p.insurance_HMO = mean(p.insurance_HMO > 0, na.rm=TRUE),
	p.insurance_IND = mean(p.insurance_IND > 0, na.rm=TRUE),
	p.insurance_OTH = mean(p.insurance_OTH > 0, na.rm=TRUE),
	p.insurance_POS = mean(p.insurance_POS > 0, na.rm=TRUE),
	p.insurance_PPO = mean(p.insurance_PPO > 0, na.rm=TRUE),
	p.medicare_priv = mean(p.medicare_priv > 0, na.rm=TRUE),
	p.medicare_dual = mean(p.medicare_dual > 0, na.rm=TRUE),
	p.medicare_lis  = mean(p.medicare_lis > 0, na.rm=TRUE),
	p.medicare_oth  = mean(p.medicare_oth > 0, na.rm=TRUE),
	
	p.pain_all  	= mean(targetpain > 0, na.rm=TRUE),
	p.pain_back 	= mean(backpain > 0, na.rm=TRUE),
	p.pain_neck 	= mean(neckpain > 0, na.rm=TRUE),
	p.pain_limb 	= mean(limbpain > 0, na.rm=TRUE),

	n.insurance_EPO = sum(p.insurance_EPO > 0, na.rm=TRUE),
	n.insurance_HMO = sum(p.insurance_HMO > 0, na.rm=TRUE),
	n.insurance_IND = sum(p.insurance_IND > 0, na.rm=TRUE),
	n.insurance_OTH = sum(p.insurance_OTH > 0, na.rm=TRUE),
	n.insurance_POS = sum(p.insurance_POS > 0, na.rm=TRUE),
	n.insurance_PPO = sum(p.insurance_PPO > 0, na.rm=TRUE),
	n.medicare_priv = sum(p.medicare_priv > 0, na.rm=TRUE),
	n.medicare_dual = sum(p.medicare_dual > 0, na.rm=TRUE),
	n.medicare_lis  = sum(p.medicare_lis > 0, na.rm=TRUE),
	n.medicare_oth  = sum(p.medicare_oth > 0, na.rm=TRUE),

	n.pain_all  	= sum(targetpain > 0, na.rm=TRUE),
	n.pain_back 	= sum(backpain > 0, na.rm=TRUE),
	n.pain_neck 	= sum(neckpain > 0, na.rm=TRUE),
	n.pain_limb 	= sum(limbpain > 0, na.rm=TRUE)
), by=c('year')]

format_table = function(table_s5_all){
	tab_np = data.frame(t(table_s5_all)[2:ncol(table_s5_all), ])
	colnames(tab_np) = c('2019','2020')
	tab_np$names = rownames(tab_np)
	tab_np = as.data.table(tab_np)
	tab_np[, c('stat','names') := tstrsplit(names, '\\.')]
	
	tab_np = dcast(tab_np, names ~ stat, value.var=c('2019','2020'))
	tab_np[names != 'age', `2019` := paste0(formatC(`2019_n`, big.mark=',', digits=10), ' (',round(`2019_p`*100, 1), ')')]
	tab_np[names == 'age', `2019` := paste0(round(`2019_n`,1), ' (',round(`2019_p`, 1), ')')]
	tab_np[names != 'age', `2020` := paste0(formatC(`2020_n`, big.mark=',', digits=10), ' (',round(`2020_p`*100, 1), ')')]
	tab_np[names == 'age', `2020` := paste0(round(`2020_n`,1), ' (',round(`2020_p`, 1), ')')]
	return(tab_np)
}

table_s5_all_formatted = format_table(tab_demo_all)
table_s5_pain_formatted = format_table(tab_demo_pain)

rio::export(table_s5_all_formatted, file.path(bucket,'projects','covid_opioid','table_s5_desc_all.xlsx'), overwrite = TRUE)
rio::export(table_s5_pain_formatted, file.path(bucket,'projects','covid_opioid','table_s5_desc_pain.xlsx'), overwrite = TRUE)
